### Replication code for "Family Matters?"
### Summer 2018
### See the readme file for more details on what goes where in this replication package
### Contact Ariel White with questions: arwhi@mit.edu

#pull in the 'full file' and make Table 1 and also "Naive OLS" Table SI.1

library(data.table)
library(lubridate)
load("households_plusfullfile_deidentified.Rdata")

electionday2012 <- ymd("2012-11-06")
wholefile[,charge1:=0]; wholefile[firstcase<electionday2012 & anyconv==0, charge1:=1]; sum(wholefile$charge1)
wholefile[,conv1:=0]; wholefile[firstcase<electionday2012 & anyconv==1  & anyjail==0, conv1:=1]; sum(wholefile$conv1)
wholefile[,jail1:=0]; wholefile[firstcase<electionday2012 & anyconv==1 & anyjail==1,jail1:=1]; sum(wholefile$jail1) 

sum(wholefile$charge1)+sum(wholefile$conv1)+sum(wholefile$jail1)

w2 <- lm(vote2012 ~ charge1 + conv1+jail1 , data=wholefile); summary(w2) #omitted category is "HH member not arrested"
w2c <- lm(vote2012 ~ charge1 + conv1+jail1 + vote2004+ vote2006 + vote2008 +voter_male + voter_age, data=wholefile); summary(w2c)

#limit to 2012 exposure (everyone else "untreated")? actually, just drop everyone with earlier cases.
jan2012<-  ymd("2012-01-01")
wholefile1 <- wholefile[(firstcase>jan2012)| is.na(firstcase), ]; dim(wholefile1)
w3c <- lm(vote2012 ~ charge1 + conv1+jail1 + vote2004+ vote2006 + vote2008 +voter_male + voter_age, data=wholefile1); summary(w3c)

#these include people who will later be treated (post-election) as untreated, reasonable I think for this approach. 
#Table SI.1:
library(stargazer)
stargazer(w2, w2c, w3c, label= "naiveols09_charge", align=T, omit.stat=c("LL","ser","f"), title="Basic OLS estimates, including prior vote and voter characteristics", covariate.labels=c("HH Member Arrested and Charged", "HH Member Convicted", "HH Member Sentenced to Jail",  "2004 Turnout", "2006 Turnout","2008 Turnout",  "Voter Male", "Voter Age (Years)"), dep.var.labels= c("Voted 2012"),
out="smallneighbors09charge_naiveols.tex",star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE) 


#and Table 1 (descriptive):
electionday2012 <- ymd("2012-11-06")

wholefile[,regdate := ymd(as.character(registered_at))]
wholefile[, voter_filetenure:=as.numeric((electionday2012 - regdate))  /(365.25)] #how old was the voter record on election day 2012? 

#trim to main sample for comparison
smallneighborsfull2 <- wholefile[is.na(firstcase)==F,]; dim(smallneighborsfull2)
smallneighborsfull2[,regdate := ymd(as.character(registered_at))]#same deal on the sample
smallneighborsfull2[, voter_filetenure:= as.numeric((electionday2012 - regdate))  /(365.25)] 

library(xtable) 
comparetab <- cbind(c(mean(smallneighborsfull2$vote2012, na.rm=T), 
	mean(smallneighborsfull2$vote2010, na.rm=T), 
	mean(smallneighborsfull2$vote2008, na.rm=T),
	mean(smallneighborsfull2$vote2006, na.rm=T), 
	mean(smallneighborsfull2$vote2004, na.rm=T),  
	mean(smallneighborsfull2$voter_age, na.rm=T), 
	mean(smallneighborsfull2$voter_male, na.rm=T), 
	mean(smallneighborsfull2$voter_filetenure, na.rm=T)), 
c(mean(wholefile$vote2012, na.rm=T), 
mean(wholefile$vote2010, na.rm=T), 
mean(wholefile$vote2008, na.rm=T), 
mean(wholefile$vote2006, na.rm=T), 
mean(wholefile$vote2004, na.rm=T), 
mean(wholefile$voter_age, na.rm=T), 
mean(wholefile$voter_male, na.rm=T), 
mean(wholefile$voter_filetenure, na.rm=T)
))

rownames(comparetab) <- c("Voter Turnout 2012", "Prior Voter Turnout (2010)", 
"Prior Voter Turnout (2008)","Prior Voter Turnout (2006)","Prior Voter Turnout (2004)", "Mean Age (Years)", "Proportion Male", "Mean Time Registered (Years)")
colnames(comparetab) <- c("Proximal Contact Sample", "All Voters")
comparetab
print(xtable(comparetab, include.rownames=T, label="desccompare", caption="Comparing the sample used in this paper to the full set of registered voters in Harris County."), file="sample_voterfile_compare_march2017.tex")


